{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*- coding = utf-8 -*-\n",
    "import requests\n",
    "import csv\n",
    "from bs4 import BeautifulSoup\n",
    "import time\n",
    "def get_info(url):\n",
    "    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'}\n",
    "    try:\n",
    "        r = requests.get(url,headers = headers)\n",
    "    except Expection as e:\n",
    "        print(e)\n",
    "    soup = BeautifulSoup(r.text, 'html.parser')\n",
    "    house_list=soup.find_all('div',class_='content__list--item')\n",
    "    house_info=[]\n",
    "    for i in house_list:\n",
    "        single_list=[]\n",
    "        title=i.find('a',class_='content__list--item--aside')['title']\n",
    "        img_url=i.find('img')['data-src']\n",
    "        single_list.extend([title,img_url])\n",
    "        loc_list=house_list[0].find('p',class_='content__list--item--des').get_text().replace('\\n','').replace(' ','').split('/')\n",
    "        single_list.extend(loc_list)\n",
    "        try:\n",
    "            is_new=i.find('i',class_='content__item__tag--is_new').get_text()\n",
    "        except Exception as e:\n",
    "            print(e)\n",
    "            is_new='Not available'\n",
    "        finally:\n",
    "\n",
    "            online=i.find('p',class_='content__list--item--time oneline').get_text()\n",
    "            single_list.extend([is_new,price,online])\n",
    "            house_info.append(single_list)  \n",
    "    return house_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "请输入要爬取的页数：5\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "page 1 is finished\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "page 2 is finished\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "page 3 is finished\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "page 4 is finished\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "'NoneType' object has no attribute 'get_text'\n",
      "page 5 is finished\n"
     ]
    }
   ],
   "source": [
    "num=int(input('请输入要爬取的页数：'))\n",
    "whole_list=[]\n",
    "for i in range(1,num+1):\n",
    "    url='https://xz.lianjia.com/zufang/pg{}/#contentList'.format(i)\n",
    "    list=get_info(url)\n",
    "    whole_list.append(list)\n",
    "    print('page {} is finished'.format(i))\n",
    "    time.sleep(3)\n",
    "with open('lianjia.csv','w',encoding='utf_8_sig') as f:\n",
    "    cw= csv.writer(f)\n",
    "    for item in whole_list:\n",
    "        for i in item:\n",
    "            cw.writerow(i) #将列表的每个元素写到csv文件的一行    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
